import os
os.listdir()
['.ipynb_checkpoints', 'train', 'train_list.txt', 'v1.ipynb', 'val', 'val_list.txt']
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
with open('train_list.txt') as f:
contents = f.readlines()
lines_train = [line.strip() for line in contents]
print('Number of label in training data:',len(lines_train))
Number of label in training data: 12686
from collections import defaultdict
%%time
d = defaultdict(list)
image_path = os.listdir('train/')
for i in range(len(image_path)):
for j in range(len(lines_train)):
#print(lines_train[j].split('lines')[1].split('.png')[0].split('/')[3])
if image_path[i] == lines_train[j].split('lines')[1].split('.png')[0].split('/')[3] + '.png':
d['image_path'].append('train/' + image_path[i])
d['label'].append(lines_train[j].split('.png\t')[1])
break
train_df = pd.DataFrame(d)
Wall time: 1min 16s
train_df.head()
| image_path | label | |
|---|---|---|
| 0 | train/a01-000u-00.png | A MOVE to stop Mr. Gaitskell from |
| 1 | train/a01-000u-01.png | nominating any more Labour life Peers |
| 2 | train/a01-000u-02.png | is to be made at a meeting of Labour |
| 3 | train/a01-000u-03.png | M |
| 4 | train/a01-000u-04.png | put down a resolution on the subject |
train_df.shape
(12686, 2)
import random
import cv2
def Rand(start, end, num):
res = []
for j in range(num):
res.append(random.randint(start, end))
return res
rand_num_list = Rand(0, 8000, 100)
for i in rand_num_list:
img = cv2.imread(train_df['image_path'][i])
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.figure(figsize=(20, 20))
plt.subplot(1, 2, 1)
plt.title(train_df['label'][i])
plt.imshow(img)
kernel_sharpening = np.array([[-1,-1,-1],
[-1,9,-1],
[-1,-1,-1]])
sharpened = cv2.filter2D(img, -1, kernel_sharpening)
plt.subplot(1, 2, 2)
plt.title(train_df['label'][i])
plt.imshow(sharpened)
plt.show()
import string
max_label_len = 0
#char_list = "!\"#&@'()*+,-./0123456789:;?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
char_list = string.printable + string.ascii_letters + string.digits
print(char_list, len(char_list))
def encode_to_labels(txt):
# encoding each output word into digits
dig_lst = []
for index, chara in enumerate(txt):
dig_lst.append(char_list.index(chara))
return dig_lst
0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~
abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 162
images = []
labels = []
RECORDS_COUNT = train_df.shape[0]
train_images = []
train_labels = []
train_input_length = []
train_label_length = []
train_original_text = []
valid_images = []
valid_labels = []
valid_input_length = []
valid_label_length = []
valid_original_text = []
inputs_length = []
labels_length = []
def process_image(img):
"""
Converts image to shape (150, 1000, 1) & normalize
"""
w, h = img.shape
# Aspect Ratio Calculation
new_w = 150
new_h = int(h * (new_w / w))
img = cv2.resize(img, (new_h, new_w))
w, h = img.shape
img = img.astype('float32')
# Converts each to (32, 128, 1)
if w < 150:
add_zeros = np.full((150-w, h), 255)
img = np.concatenate((img, add_zeros))
w, h = img.shape
if h < 1000:
add_zeros = np.full((w, 128-h), 255)
img = np.concatenate((img, add_zeros), axis=1)
w, h = img.shape
if h > 1000 or w > 150:
dim = (1000,150)
img = cv2.resize(img, dim)
img = cv2.subtract(255, img)
img = np.expand_dims(img, axis=2)
# Normalize
img = img / 255
return img
%%time
i = 0
for index, line in enumerate(lines_train):
filepath = train_df.loc[i, 'image_path']
word = train_df.loc[i, 'label']
# process image
img = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
try:
img = process_image(img)
except:
i += 1
continue
# process label
#print(train_df.loc[i, 'label'])
label = encode_to_labels(word)
if index % 8 == 0:
valid_images.append(img)
valid_labels.append(label)
valid_input_length.append(150)
valid_label_length.append(len(word))
valid_original_text.append(word)
else:
train_images.append(img)
train_labels.append(label)
train_input_length.append(150)
train_label_length.append(len(word))
train_original_text.append(word)
if len(word) > max_label_len:
max_label_len = len(word)
i += 1
if i >= RECORDS_COUNT:
break
Wall time: 35.6 s
len(train_images), len(valid_images), len(train_labels), len(valid_labels)
(10735, 1544, 10735, 1544)
!pip install keras_tqdm
Requirement already satisfied: keras_tqdm in c:\users\dawaaii\anaconda3\lib\site-packages (2.0.1) Requirement already satisfied: tqdm in c:\users\dawaaii\anaconda3\lib\site-packages (from keras_tqdm) (4.59.0) Requirement already satisfied: Keras in c:\users\dawaaii\anaconda3\lib\site-packages (from keras_tqdm) (2.4.3) Requirement already satisfied: pyyaml in c:\users\dawaaii\anaconda3\lib\site-packages (from Keras->keras_tqdm) (5.4.1) Requirement already satisfied: scipy>=0.14 in c:\users\dawaaii\anaconda3\lib\site-packages (from Keras->keras_tqdm) (1.6.2) Requirement already satisfied: numpy>=1.9.1 in c:\users\dawaaii\anaconda3\lib\site-packages (from Keras->keras_tqdm) (1.19.5) Requirement already satisfied: h5py in c:\users\dawaaii\anaconda3\lib\site-packages (from Keras->keras_tqdm) (3.1.0)
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, LSTM, Reshape, BatchNormalization, Input, Conv2D, MaxPool2D, Lambda, Bidirectional, Flatten, Activation, MaxPooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.activations import relu, sigmoid, softmax
import tensorflow.keras.backend as K
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint
#from keras_tqdm import TQDMNotebookCallback
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.python.client import device_lib
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
# Check all available devices if GPU is available
print(device_lib.list_local_devices())
sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 2871999445918458268
]
Device mapping: no known devices.
tf.config.experimental.list_physical_devices('GPU')
[]
%%time
train_padded_label = pad_sequences(train_labels,
maxlen=max_label_len,
padding='post',
value=len(char_list))
valid_padded_label = pad_sequences(valid_labels,
maxlen=max_label_len,
padding='post',
value=len(char_list))
Wall time: 87.7 ms
%%time
train_images = np.asarray(train_images)
train_input_length = np.asarray(train_input_length)
train_label_length = np.asarray(train_label_length)
valid_images = np.asarray(valid_images)
valid_input_length = np.asarray(valid_input_length)
valid_label_length = np.asarray(valid_label_length)
Wall time: 4.72 s
input_data = Input(shape=(150, 1000, 1), dtype='float32')
input_data
<KerasTensor: shape=(None, 150, 1000, 1) dtype=float32 (created by layer 'input_1')>
inner = Conv2D(64, (3, 5), padding='same', name='conv1', kernel_initializer='he_normal')(input_data)
inner
<KerasTensor: shape=(None, 150, 1000, 64) dtype=float32 (created by layer 'conv1')>
inner = BatchNormalization()(inner)
inner
<KerasTensor: shape=(None, 150, 1000, 64) dtype=float32 (created by layer 'batch_normalization')>
inner = Activation('relu')(inner)
inner
<KerasTensor: shape=(None, 150, 1000, 64) dtype=float32 (created by layer 'activation')>
inner = MaxPooling2D(pool_size=(1, 2), name='max1')(inner) # (None,64, 32, 64)
inner
<KerasTensor: shape=(None, 150, 500, 64) dtype=float32 (created by layer 'max1')>
inner = Conv2D(128, (3, 5), padding='same', name='conv2', kernel_initializer='he_normal')(inner) # (None, 64, 32, 128)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(1, 2), name='max2')(inner)
inner
<KerasTensor: shape=(None, 150, 250, 128) dtype=float32 (created by layer 'max2')>
inner = Conv2D(256, (3, 5), padding='same', name='conv3', kernel_initializer='he_normal')(inner) # (None, 32, 16, 256)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = Conv2D(256, (3, 5), padding='same', name='conv4', kernel_initializer='he_normal')(inner) # (None, 32, 16, 256)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(1, 2), name='max3')(inner)
inner
<KerasTensor: shape=(None, 150, 125, 256) dtype=float32 (created by layer 'max3')>
inner = Conv2D(512, (3, 5), padding='same', name='conv5', kernel_initializer='he_normal')(inner) # (None, 32, 8, 512)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = Conv2D(512, (3, 5), padding='same', name='conv6')(inner) # (None, 32, 8, 512)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(1, 2), name='max4')(inner)
inner
<KerasTensor: shape=(None, 150, 62, 512) dtype=float32 (created by layer 'max4')>
inner = Conv2D(512, (3, 5), padding='same', kernel_initializer='he_normal', name='con7')(inner) # (None, 32, 4, 512)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner
<KerasTensor: shape=(None, 150, 62, 512) dtype=float32 (created by layer 'activation_6')>
inner = Reshape(target_shape=((150, 62*512)), name='reshape')(inner) # (None, 32, 2048)
inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner)
inner
<KerasTensor: shape=(None, 150, 64) dtype=float32 (created by layer 'dense1')>
from keras.layers.merge import add, concatenate
lstm_1 = LSTM(256, return_sequences=True, kernel_initializer='he_normal', name='lstm1')(inner) # (None, 32, 512)
lstm_1b = LSTM(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm1_b')(inner)
reversed_lstm_1b = Lambda(lambda inputTensor: K.reverse(inputTensor, axes=1)) (lstm_1b)
reversed_lstm_1b
<KerasTensor: shape=(None, 150, 256) dtype=float32 (created by layer 'lambda')>
lstm1_merged = add([lstm_1, reversed_lstm_1b]) # (None, 32, 512)
lstm1_merged = BatchNormalization()(lstm1_merged)
lstm_2 = LSTM(256, return_sequences=True, kernel_initializer='he_normal', name='lstm2')(lstm1_merged)
lstm_2b = LSTM(256, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm_b')(lstm1_merged)
reversed_lstm_2b= Lambda(lambda inputTensor: K.reverse(inputTensor, axes=1)) (lstm_2b)
reversed_lstm_2b
<KerasTensor: shape=(None, 150, 256) dtype=float32 (created by layer 'lambda_1')>
lstm2_merged = concatenate([lstm_2, reversed_lstm_2b]) # (None, 32, 1024)
lstm2_merged = BatchNormalization()(lstm2_merged)
#Dense(len(char_list)+1, activation = 'softmax')(blstm_2)
inner = Dense(len(char_list)+1, kernel_initializer='he_normal',name='dense2')(lstm2_merged) #(None, 32, 80)
y_pred = Activation('softmax', name='softmax')(inner)
y_pred
<KerasTensor: shape=(None, 150, 163) dtype=float32 (created by layer 'softmax')>
act_model = Model(input_data, y_pred)
act_model.summary()
Model: "model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 150, 1000, 1 0
__________________________________________________________________________________________________
conv1 (Conv2D) (None, 150, 1000, 64 1024 input_1[0][0]
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 150, 1000, 64 256 conv1[0][0]
__________________________________________________________________________________________________
activation (Activation) (None, 150, 1000, 64 0 batch_normalization[0][0]
__________________________________________________________________________________________________
max1 (MaxPooling2D) (None, 150, 500, 64) 0 activation[0][0]
__________________________________________________________________________________________________
conv2 (Conv2D) (None, 150, 500, 128 123008 max1[0][0]
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 150, 500, 128 512 conv2[0][0]
__________________________________________________________________________________________________
activation_1 (Activation) (None, 150, 500, 128 0 batch_normalization_1[0][0]
__________________________________________________________________________________________________
max2 (MaxPooling2D) (None, 150, 250, 128 0 activation_1[0][0]
__________________________________________________________________________________________________
conv3 (Conv2D) (None, 150, 250, 256 491776 max2[0][0]
__________________________________________________________________________________________________
batch_normalization_2 (BatchNor (None, 150, 250, 256 1024 conv3[0][0]
__________________________________________________________________________________________________
activation_2 (Activation) (None, 150, 250, 256 0 batch_normalization_2[0][0]
__________________________________________________________________________________________________
conv4 (Conv2D) (None, 150, 250, 256 983296 activation_2[0][0]
__________________________________________________________________________________________________
batch_normalization_3 (BatchNor (None, 150, 250, 256 1024 conv4[0][0]
__________________________________________________________________________________________________
activation_3 (Activation) (None, 150, 250, 256 0 batch_normalization_3[0][0]
__________________________________________________________________________________________________
max3 (MaxPooling2D) (None, 150, 125, 256 0 activation_3[0][0]
__________________________________________________________________________________________________
conv5 (Conv2D) (None, 150, 125, 512 1966592 max3[0][0]
__________________________________________________________________________________________________
batch_normalization_4 (BatchNor (None, 150, 125, 512 2048 conv5[0][0]
__________________________________________________________________________________________________
activation_4 (Activation) (None, 150, 125, 512 0 batch_normalization_4[0][0]
__________________________________________________________________________________________________
conv6 (Conv2D) (None, 150, 125, 512 3932672 activation_4[0][0]
__________________________________________________________________________________________________
batch_normalization_5 (BatchNor (None, 150, 125, 512 2048 conv6[0][0]
__________________________________________________________________________________________________
activation_5 (Activation) (None, 150, 125, 512 0 batch_normalization_5[0][0]
__________________________________________________________________________________________________
max4 (MaxPooling2D) (None, 150, 62, 512) 0 activation_5[0][0]
__________________________________________________________________________________________________
con7 (Conv2D) (None, 150, 62, 512) 3932672 max4[0][0]
__________________________________________________________________________________________________
batch_normalization_6 (BatchNor (None, 150, 62, 512) 2048 con7[0][0]
__________________________________________________________________________________________________
activation_6 (Activation) (None, 150, 62, 512) 0 batch_normalization_6[0][0]
__________________________________________________________________________________________________
reshape (Reshape) (None, 150, 31744) 0 activation_6[0][0]
__________________________________________________________________________________________________
dense1 (Dense) (None, 150, 64) 2031680 reshape[0][0]
__________________________________________________________________________________________________
lstm1_b (LSTM) (None, 150, 256) 328704 dense1[0][0]
__________________________________________________________________________________________________
lstm1 (LSTM) (None, 150, 256) 328704 dense1[0][0]
__________________________________________________________________________________________________
lambda (Lambda) (None, 150, 256) 0 lstm1_b[0][0]
__________________________________________________________________________________________________
add (Add) (None, 150, 256) 0 lstm1[0][0]
lambda[0][0]
__________________________________________________________________________________________________
batch_normalization_7 (BatchNor (None, 150, 256) 1024 add[0][0]
__________________________________________________________________________________________________
lstm_b (LSTM) (None, 150, 256) 525312 batch_normalization_7[0][0]
__________________________________________________________________________________________________
lstm2 (LSTM) (None, 150, 256) 525312 batch_normalization_7[0][0]
__________________________________________________________________________________________________
lambda_1 (Lambda) (None, 150, 256) 0 lstm_b[0][0]
__________________________________________________________________________________________________
concatenate (Concatenate) (None, 150, 512) 0 lstm2[0][0]
lambda_1[0][0]
__________________________________________________________________________________________________
batch_normalization_8 (BatchNor (None, 150, 512) 2048 concatenate[0][0]
__________________________________________________________________________________________________
dense2 (Dense) (None, 150, 163) 83619 batch_normalization_8[0][0]
__________________________________________________________________________________________________
softmax (Activation) (None, 150, 163) 0 dense2[0][0]
==================================================================================================
Total params: 15,266,403
Trainable params: 15,260,387
Non-trainable params: 6,016
__________________________________________________________________________________________________
# # input with shape of height=150 and width=1000
# inputs = Input(shape=(150,1000,1))
# # convolution layer with kernel size (3,3)
# conv_1 = Conv2D(64, (3,5), activation = 'relu', padding='same')(inputs)
# # poolig layer with kernel size (2,2)
# pool_1 = MaxPooling2D((2,2), padding='same')(conv_1)
# conv_2 = Conv2D(128, (3,5), activation = 'relu', padding='same')(pool_1)
# pool_2 = MaxPooling2D((2,2), padding='same')(conv_2)
# conv_3 = Conv2D(256, (3,5), activation = 'relu', padding='same')(pool_2)
# conv_4 = Conv2D(256, (3,5), activation = 'relu', padding='same')(conv_3)
# # poolig layer with kernel size (2,1)
# pool_4 = MaxPooling2D((2,2), padding='same')(conv_4)
# conv_5 = Conv2D(512, (3,5), activation = 'relu', padding='same')(pool_4)
# # Batch normalization layer
# batch_norm_5 = BatchNormalization()(conv_5)
# conv_6 = Conv2D(512, (3,5), activation = 'relu', padding='same')(batch_norm_5)
# batch_norm_6 = BatchNormalization()(conv_6)
# pool_6 = MaxPooling2D((2,2), padding='same')(batch_norm_6)
# conv_7 = Conv2D(512, (3,5), activation = 'relu')(pool_6)
# squeezed = Flatten()(conv_7)
# # bidirectional LSTM layers with units=128
# blstm_1 = LSTM(256, return_sequences=True, dropout = 0.2)(squeezed)
# blstm_2 = LSTM(256, return_sequences=True, dropout = 0.2)(blstm_1)
# outputs = Dense(len(char_list)+1, activation = 'softmax')(blstm_2)
# # model to be used at test time
# act_model = Model(inputs, outputs)
the_labels = Input(name='the_labels', shape=[max_label_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, the_labels, input_length, label_length])
#model to be used at training time
model = Model(inputs=[input_data, the_labels, input_length, label_length], outputs=loss_out)
batch_size = 8
epochs = 5
e = str(epochs)
optimizer_name = 'adam'
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer = optimizer_name, metrics=['accuracy'])
filepath="{}o-{}r-{}e-{}t-{}v.hdf5".format(optimizer_name,
str(RECORDS_COUNT),
str(epochs),
str(train_images.shape[0]),
str(valid_images.shape[0]))
checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
callbacks_list = [checkpoint]
history = model.fit(x=[train_images, train_padded_label, train_input_length, train_label_length],
y=np.zeros(len(train_images)),
batch_size=batch_size,
epochs=epochs,
validation_data=([valid_images, valid_padded_label, valid_input_length, valid_label_length], [np.zeros(len(valid_images))]),
verbose=1,callbacks=callbacks_list)
Epoch 1/5 1342/1342 [==============================] - 72727s 54s/step - loss: 158.3878 - accuracy: 0.0000e+00 - val_loss: 139.0172 - val_accuracy: 0.0000e+00 Epoch 00001: val_loss improved from inf to 139.01720, saving model to adamo-12686r-5e-10735t-1544v.hdf5
C:\Users\dawaaii\anaconda3\lib\site-packages\tensorflow\python\keras\utils\generic_utils.py:494: CustomMaskWarning: Custom mask layers require a config and must override get_config. When loading, the custom mask layer must be passed to the custom_objects argument.
warnings.warn('Custom mask layers require a config and must override '
Epoch 2/5 455/1342 [=========>....................] - ETA: 13:03:29 - loss: 136.0808 - accuracy: 0.0000e+00
# prediction = act_model.predict(train_images[150:170])
# # use CTC decoder
# decoded = K.ctc_decode(prediction,
# input_length=np.ones(prediction.shape[0]) * prediction.shape[1],
# greedy=True)[0][0]
# out = K.get_value(decoded)
# # see the results
# for i, x in enumerate(out):
# print("original_text = ", train_original_text[150+i])
# print("predicted text = ", end = '')
# for p in x:
# if int(p) != -1:
# print(char_list[int(p)], end = '')
# plt.imshow(train_images[150+i].reshape(32,128), cmap=plt.cm.gray)
# plt.show()
# print('\n')